Part of the possible emulator accuracy issues could be satellite fraction issues. Gonna look at those explicitly.


In [161]:
from pearce.emulator import SpicyBuffalo, LemonPepperWet, OriginalRecipe
from pearce.mocks import cat_dict
import numpy as np
from os import path

In [162]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [163]:
#xi gg
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
#test_file= '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/'
test_file =  '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/PearceRedMagicXiCosmoFixedNd_Test.hdf5'
#xi gm training_file = '/scratch/users/swmclau2/xi_gm_cosmo/PearceRedMagicXiGMCosmoFixedNd.hdf5' test_file = '/scratch/users/swmclau2/xi_gm_cosmo_test2/PearceRedMagicXiGMCosmoFixedNdTest.hdf5'

In [164]:
em_method = 'gp'
split_method = 'random'

In [165]:
a = 1.0
z = 1.0/a - 1.0

In [166]:
scale_bin_centers = np.array([  0.09581734,   0.13534558,   0.19118072,   0.27004994,
                             0.38145568,   0.53882047,   0.76110414,   1.07508818,
                             1.51860241,   2.14508292,   3.03001016,   4.28000311,
                             6.04566509,   8.53972892,  12.06268772,  17.0389993 ,
                            24.06822623,  33.99727318])

In [167]:
bin_idx = 1
fixed_params = {'z':z, 'r': scale_bin_centers[bin_idx]}#, 'cosmo': 0}#, 'r':24.06822623}

In [168]:
np.random.seed(0)
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,
                 custom_mean_function = 'linear', downsample_factor = 0.1)

In [169]:
emu.scale_bin_centers


Out[169]:
array([  0.09581734,   0.13534558,   0.19118072,   0.27004994,
         0.38145568,   0.53882047,   0.76110414,   1.07508818,
         1.51860241,   2.14508292,   3.03001016,   4.28000311,
         6.04566509,   8.53972892,  12.06268772,  17.0389993 ,
        24.06822623,  33.99727318])

In [170]:
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)

In [171]:
test_x, test_y, test_cov, _ = emu.get_data(test_file, emu.fixed_params)

t, old_idxs  = emu._whiten(test_x)

In [172]:
resmat_flat = 10**pred_y - 10**data_y
datamat_flat = 10**data_y

In [173]:
t_bin = t
acc_bin = np.abs(resmat_flat)/datamat_flat

In [174]:
from pearce.mocks.kittens import TrainingBox

In [175]:
boxno = 0
cat = TrainingBox(boxno, system = 'sherlock')

In [176]:
cat.load(a, HOD='zheng07')

In [ ]:
nd = 1e-4

In [ ]:
hod_pnames = emu.get_param_names()[7:]
mf = cat.calc_mf()

In [ ]:
for pname in hod_pnames:
    print pname, emu.get_param_bounds(pname)

In [ ]:
from scipy.optimize import minimize_scalar
def add_logMmin(hod_params, cat):
    """
    In the fixed number density case, find the logMmin value that will match the nd given hod_params
    :param: hod_params:
        The other parameters besides logMmin
    :param cat:
        the catalog in question
    :return:
        None. hod_params will have logMmin added to it.
    """
    hod_params['logMmin'] = 13.0 #initial guess
    #cat.populate(hod_params) #may be overkill, but will ensure params are written everywhere
    def func(logMmin, hod_params):
        hod_params.update({'logMmin':logMmin})
        return (cat.calc_analytic_nd(hod_params) - nd)**2

    res = minimize_scalar(func, bounds = (12.0, 16.0), args = (hod_params,), options = {'maxiter':100},\
                          method = 'Bounded')

    # assuming this doens't fail
    #print 'logMmin', res.x
    hod_params['logMmin'] = res.x
    #print hod_params

In [ ]:
sat_fracs = np.zeros((1000,))
sat_nd = np.zeros((1000,))
actual_nd = np.zeros_like(sat_fracs)
log_mMins = np.zeros_like(sat_fracs)

for idx, x in enumerate(test_x[:1000, 7:]):
    hod_params = dict(zip(hod_pnames, x))
    add_logMmin(hod_params, cat)
    log_mMins[idx] = hod_params['logMmin']
    sat_hod = cat.calc_hod(hod_params, component='satellite')
    sat_nd[idx] = np.sum(mf*sat_hod)/((cat.Lbox/cat.h)**3)
    #sat_fracs[idx] = sat_nd/nd
    actual_nd[idx] = cat.calc_analytic_nd(hod_params)
    
sat_fracs = sat_nd/actual_nd

In [ ]:
plt.hist(sat_fracs)

In [ ]:
sat_fracs.mean()

In [ ]:
sat_fracs.std()

In [ ]:
plt.hist(log_mMins)

In [ ]:
hod_pnames

In [ ]:
plt.scatter(test_x[:1000, 9], acc_bin[:1000])

In [ ]:
test_x[:5000,0]

In [ ]:
pnames = emu.get_param_names()
for i in xrange(7):
    for j in xrange(7):
        mean_acc = np.mean(acc_bin[j*5000:(j+1)*5000])
        plt.scatter(test_x[j*5000, i], mean_acc, label = 'Cosmo %d'%j)
    plt.xlabel(pnames[i])
    plt.ylabel('Avg. Percent Accurate')
    plt.title('r = %.2f'%scale_bin_centers[bin_idx])

    plt.legend(loc = 'best')
    plt.show()

In [ ]:
test_x[0*35::1000, 9]

In [ ]:
pnames = emu.get_param_names()
for i in xrange(7,11):
    for j in xrange(0,1000):
        mean_acc = np.mean(acc_bin[j::1000])
        plt.scatter(test_x[j, i], mean_acc, label = 'HOD %d'%j, alpha = 0.6)
    plt.xlabel(pnames[i])
    plt.ylabel('Avg. Percent Accurate')
    plt.title('r = %.2f'%scale_bin_centers[bin_idx])
    #plt.legend(loc = 'best')
    plt.show()

In [ ]:
mcut = 13.5
sub_test_idx = np.logical_and(test_x[:, 9]>mcut, test_x[:, 7] < mcut)
print np.mean(acc_bin[sub_test_idx]), np.sum(sub_test_idx)

In [ ]:
plt.scatter(test_x[:1000, 9], sat_fracs)
plt.xlabel('logM1')
plt.ylabel('Sat Frac')

In [ ]:
plt.scatter(test_x[:1000, 9], log_mMins)
plt.xlabel('logM1')
plt.ylabel('logMmin')

In [ ]:
plt.hist(1e4*(actual_nd-nd) )

In [ ]:
plt.scatter(test_x[:1000, 9], 1e4*(actual_nd-nd) )
plt.xlabel('logM1')
plt.ylabel('Actual nd - Fixed nd')

In [ ]:
good_nd_idxs = np.isclose(actual_nd, nd)
print np.sum(good_nd_idxs)/1000.

In [ ]:
# NOTE sat_fracs uses actual_nd, so these is a weird selection
good_satfrac_idxs = np.logical_and(0.1 < sat_fracs, sat_fracs < 0.5)
print np.sum(good_satfrac_idxs)/1000.

In [ ]:
print np.sum(np.logical_and(good_satfrac_idxs, good_nd_idxs))/1000.

In [ ]: